Flowchart

library(GENIE3)
library(doParallel)
library(igraph)
library(tidyverse)
library(DT)
library(reticulate)
library(learn2count)
library(rbenchmark)
library(reshape2)
library(gridExtra)
library(DiagrammeR)
library(pROC)
library(JRF)
library(DiagrammeRsvg)
library(rsvg)
library(RColorBrewer)
library(rbenchmark)
library(ZILGM)
library(patchwork)
library(scales)
library(INetTool)

use_python("/usr/bin/python3", required = TRUE)
arboreto <- import("arboreto.algo")
pandas <- import("pandas")
numpy <- import("numpy")

time <- list()
source("generate_adjacency.R")
source("symmetrize.R")
source("pscores.R")
source("plotg.R")
source("compare_consensus.R")
source("create_consensus.R")
source("earlyj.R")
source("plotROC.R")
source("cutoff_adjacency.R")
source("infer_networks.R")
grViz_output <- DiagrammeR::grViz("
digraph biological_workflow {
  # Set up the graph attributes
  graph [layout = dot, rankdir = TB]

  # Define consistent node styles
  node [shape = rectangle, style = filled, color = lightblue, fontsize = 12]

  # Define nodes for each step
  StartNode [label = 'Ground Thruth - String Regulatory Network', shape = oval, color = seagreen, fontcolor = black]
  AdjacencyMatrix [label = 'Thruth Adjacency Matrix', shape = rectangle, color = seagreen]
  SimulateData [label = 'Simulate Single-Cell Data', shape = rectangle, color = goldenrod]

  # Reconstruction using Three Packages
  LateIntegration [label = 'Late\nIntegration', shape = oval, color = khaki]
  EarlyIntegration [label = 'Early\nIntegration', shape = oval, color = khaki]
  Jointanalysis [label = 'Joint\nanalysis', shape = oval, color = khaki]
  

  # Process 
  earlyj [label = 'earlyj.R', shape=diamond, color=lightblue, fontcolor=black]
  networkinference [label = 'infer_networks.R\nGENIE3\nGRNBoost2\nZILGM\nJRF', shape = rectangle, color = goldenrod, fontcolor=black]
  symmetrize [label = 'symmetrize.R', shape = rectangle, color = goldenrod, fontcolor=black]
  plotROC [label = 'plotROC.R', shape=diamond, color=lightblue, fontcolor=black]
  generateadjacency [label='generate_adjacency.R\nWeighted Adjacency', shape=rectangle, color=goldenrod, fontcolor=black]
  cutoffadjacency [label='cutoff_adjacency.R\nBinary Adjacency', shape=rectangle, color=goldenrod, fontcolor=black]
  pscores [label='pscores.R\nTPR\nFPR\nF1\nAccuracy\nPrecision', shape=diamond, color=lightblue, fontcolor=black]
  voting [label='Voting\nUnion\nIntersection', shape=diamond, color=lightblue, fontcolor=black]
  plotgcompare  [label='plotg.R\ncompare_consesus.R\nPlot Graphs', shape=rectangle, color=goldenrod, fontcolor=black]

  # Define the workflow structure
  StartNode -> AdjacencyMatrix
  AdjacencyMatrix -> SimulateData
  SimulateData -> LateIntegration
  SimulateData -> EarlyIntegration
  SimulateData -> Jointanalysis
  EarlyIntegration -> earlyj
  earlyj -> networkinference
  LateIntegration -> networkinference
  Jointanalysis -> networkinference
  networkinference -> symmetrize
  symmetrize -> plotROC
  symmetrize -> generateadjacency
  generateadjacency -> cutoffadjacency
  cutoffadjacency -> pscores
  cutoffadjacency -> voting
  voting -> plotgcompare
  voting -> pscores
}
")

svg_code <- export_svg(grViz_output)
rsvg::rsvg_png(charToRaw(svg_code), "./../analysis/flowchart.png")

grViz_output

Tcell Ground Truth

adjm <- read.table("./../data/allblood_adjacency_matrix.csv", header = T, row.names = 1, sep = ",") %>% as.matrix()
diag(adjm) <- 0

adjm %>%
    datatable(extensions = 'Buttons',
            options = list(
              dom = 'Bfrtip',
              buttons = c('csv', 'excel'),
              scrollX = TRUE,
              pageLength = 10), 
            caption = "Ground Truth")
gtruth <- igraph::graph_from_adjacency_matrix(adjm, mode = "undirected", diag = F)

num_nodes <- vcount(gtruth)
num_edges <- ecount(gtruth)

set.seed(1234)
plot(gtruth, 
     main = paste("Ground Truth\nNodes:", num_nodes, "Edges:", num_edges),
     vertex.label.color = "black",
     vertex.size = 6, 
     edge.width = 2, 
     vertex.label = NA,
     vertex.color = "steelblue",
     layout = igraph::layout_with_fr)

Simulate Data

ncell <- 500
nodes <- nrow(adjm)

set.seed(1130)
mu_values <- c(3, 6, 9)
theta_values <- c(1, 0.7, 0.5)

count_matrices <- lapply(1:3, function(i) {
  set.seed(1130 + i)
  mu_i <- mu_values[i]
  theta_i <- theta_values[i]
  
  count_matrix_i <- simdata(n = ncell, p = nodes, B = adjm, family = "ZINB", 
                            mu = mu_i, mu_noise = 1, theta = theta_i, pi = 0.2)
  
  count_matrix_df <- as.data.frame(count_matrix_i)
  colnames(count_matrix_df) <- colnames(adjm)
  rownames(count_matrix_df) <- paste("cell", 1:nrow(count_matrix_df), sep = "")
  
  return(count_matrix_df)
})

count_matrices[[1]] %>%
    datatable(extensions = 'Buttons',
            options = list(
              dom = 'Bfrtip',
              buttons = c('csv', 'excel'),
              scrollX = TRUE,
              pageLength = 10), 
            caption = "Simulated count matrix")
saveRDS(count_matrices, "./../analysis/count_matrices.RDS")

Matrices Integration

Late Integration

GENIE3

set.seed(1234)
time[["GENIE3_late_15Cores"]] <- system.time(
  genie3_late <- infer_networks(count_matrices, method="GENIE3", nCores = 15)
)

saveRDS(genie3_late, "./../analysis/genie3_late.RDS")

genie3_late[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 output")

symmetrize Output and ROC

genie3_late_wadj <- generate_adjacency(genie3_late, ground.truth = adjm)
sgenie3_late_wadj <- symmetrize(genie3_late_wadj, weight_function = "mean")
genie3_late_auc <- plotROC(sgenie3_late_wadj, adjm, plot_title = "ROC curve - GENIE3 Late Integration")

sgenie3_late_wadj[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 symmetrize output")

Generate Adjacency and Apply Cutoff

sgenie3_late_adj <- cutoff_adjacency(count_matrices = count_matrices,
                 weighted_adjm_list = sgenie3_late_wadj, 
                 ground.truth = adjm,
                 n = 3,
                 method = "GENIE3",
                 nCores = 15)
## Matrix 1 Mean 95th Percentile Cutoff: 0.004953592 
## Matrix 2 Mean 95th Percentile Cutoff: 0.004929168 
## Matrix 3 Mean 95th Percentile Cutoff: 0.004974256
sgenie3_late_adj[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 adjacency")

Comparison with the Ground Truth

scores.genie3.late.all <- pscores(adjm, sgenie3_late_adj)

scores.genie3.late.all$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
plots <- plotg(sgenie3_late_adj)

consesusm <- create_consensus(sgenie3_late_adj, method="vote")
consesusu <- create_consensus(sgenie3_late_adj, method="union")
consesunet <- create_consensus(adj_matrix_list = sgenie3_late_adj, weighted_list = sgenie3_late_wadj, method = "INet", threshold = 0.05, ncores = 15)
## [1] 0.3226695
## [1] 0.05773287
scores.genie3.late <- pscores(adjm, list(consesusm))

scoresu.genie3.late <- pscores(adjm, list(consesusu))

scoresnet.genie3.late <- pscores(adjm, list(consesunet))

scores.genie3.late$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores vote")
scores.genie3.late$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores union")
scoresnet.genie3.late$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores INet")
ajm_compared <- compare_consensus(consesusm, adjm)

ajm_compared <- compare_consensus(consesusu, adjm)

ajm_compared <- compare_consensus(consesunet, adjm)

GRNBoost2

set.seed(1234)
time[["GRNBoost_late"]] <- system.time(
  grnb_late <- infer_networks(count_matrices, method="GRNBoost2")
)

saveRDS(grnb_late, "./../analysis/grnb_late.RDS")

grnb_late[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 output")

symmetrize Output and ROC

grnb_late_wadj <- generate_adjacency(grnb_late, ground.truth = adjm)
sgrnb_late_wadj <- symmetrize(grnb_late_wadj, weight_function = "mean")
grnb_late_auc <- plotROC(sgrnb_late_wadj, adjm, plot_title = "ROC curve - GRNBoost2 Late Integration")

sgrnb_late_wadj[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 symmetrize output")

Generate Adjacency and Apply Cutoff

sgrnb_late_adj <- cutoff_adjacency(count_matrices = count_matrices,
                 weighted_adjm_list = sgrnb_late_wadj, 
                 ground.truth = adjm,
                 n = 3,
                 method = "GRNBoost2")
## Matrix 1 Mean 95th Percentile Cutoff: 0.5680854 
## Matrix 2 Mean 95th Percentile Cutoff: 0.5665772 
## Matrix 3 Mean 95th Percentile Cutoff: 0.5628565
sgrnb_late_adj[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 adjacency")

Comparison with the Ground Truth

scores.grn.late.all <- pscores(adjm, sgrnb_late_adj)

scores.grn.late.all$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
plots <- plotg(sgrnb_late_adj)

consesusm <- create_consensus(sgrnb_late_adj, method="vote")
consesusu <- create_consensus(sgrnb_late_adj, method="union")
consesunet <- create_consensus(adj_matrix_list = sgrnb_late_adj, weighted_list = sgrnb_late_wadj, method = "INet", threshold = 0.05, ncores = 15)
## [1] 0.3184693
## [1] 0.03758124
scores.grn.late <- pscores(adjm, list(consesusm))

scoresu.grn.late <- pscores(adjm, list(consesusu))

scoresnet.grn.late <- pscores(adjm, list(consesunet))

scores.grn.late$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores vote")
scoresu.grn.late$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores union")
scoresnet.grn.late$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores INet")
ajm_compared <- compare_consensus(consesusm, adjm)

ajm_compared <- compare_consensus(consesusu, adjm)

ajm_compared <- compare_consensus(consesunet, adjm)

ZILGM Park

set.seed(1234)
time[["ZILGM_late_15Cores"]] <- system.time(
  zilgm_late <- infer_networks(count_matrices_list = count_matrices, method = "ZILGM", adjm = adjm, nCores = 15)
)
## learning for NBII graphical model 
## nlambda : 50
## penalty function : LASSO
## update type : IRLS
## Conducting sampling in progress :  10 % 
Conducting sampling in progress :  20 % 
Conducting sampling in progress :  30 % 
Conducting sampling in progress :  40 % 
Conducting sampling in progress :  50 % 
Conducting sampling in progress :  60 % 
Conducting sampling in progress :  70 % 
Conducting sampling in progress :  80 % 
Conducting sampling in progress :  90 % 
Conducting sampling in progress :  100 % 
learning for NBII graphical model 
## nlambda : 50
## penalty function : LASSO
## update type : IRLS
## Conducting sampling in progress :  10 % 
Conducting sampling in progress :  20 % 
Conducting sampling in progress :  30 % 
Conducting sampling in progress :  40 % 
Conducting sampling in progress :  50 % 
Conducting sampling in progress :  60 % 
Conducting sampling in progress :  70 % 
Conducting sampling in progress :  80 % 
Conducting sampling in progress :  90 % 
Conducting sampling in progress :  100 % 
learning for NBII graphical model 
## nlambda : 50
## penalty function : LASSO
## update type : IRLS
## Conducting sampling in progress :  10 % 
Conducting sampling in progress :  20 % 
Conducting sampling in progress :  30 % 
Conducting sampling in progress :  40 % 
Conducting sampling in progress :  50 % 
Conducting sampling in progress :  60 % 
Conducting sampling in progress :  70 % 
Conducting sampling in progress :  80 % 
Conducting sampling in progress :  90 % 
Conducting sampling in progress :  100 % 
Assigned lamb to lambda_results[[ 1 ]] with 50 matrices.
## Assigned lamb to lambda_results[[ 2 ]] with 50 matrices.
## Assigned lamb to lambda_results[[ 3 ]] with 50 matrices.
saveRDS(zilgm_late, "./../analysis/zilgm_late.RDS")

est_graphs <- zilgm_late$network_results
lambdas <- zilgm_late$lambda_results
plotROC(lambdas[[1]], adjm, plot_title = "Lambda ROC Matrix1", is_binary = T)
plotROC(lambdas[[2]], adjm, plot_title = "Lambda ROC Matrix2", is_binary = T)
plotROC(lambdas[[3]], adjm, plot_title = "Lambda ROC Matrix3", is_binary = T)
consensus_matrices <- vector("list", 50)

for (i in 1:50) {
  ranklambda <- list(lambdas[[1]][[i]], lambdas[[2]][[i]], lambdas[[3]][[i]])
  consensus_matrices[[i]] <- create_consensus(ranklambda, method="vote")
}

zilgm_late_auc <- plotROC(consensus_matrices, adjm, plot_title = "ROC voting lambda", is_binary = T)

Comparison with the Ground Truth

scores.zilgm.late.all <- pscores(adjm, est_graphs)

scores.zilgm.late.all$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
plots <- plotg(est_graphs)

consesusm <- create_consensus(est_graphs, method="vote")
consesusu <- create_consensus(est_graphs, method="union")

scores.zilgm.late <- pscores(adjm, list(consesusm))

scoresu.zilgm.late <- pscores(adjm, list(consesusu))

scores.zilgm.late$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
scoresu.zilgm.late$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
ajm_compared <- compare_consensus(consesusm, adjm)

ajm_compared <- compare_consensus(consesusu, adjm)

PCzinb

#tictoc::tic("PCzinb late")
#zinbres <- infer_networks(count_matrices_list = count_matrices, 
#                          method = "PCzinb",
#                          adjm = adjm)
#execution_times[['PCzinb late']] <- tictoc::toc(log = TRUE)$toc[[1]]
#consesusm <- create_consensus(zinbres, method="vote")
#consesusu <- create_consensus(zinbres, method="union")

#scores.pc.late <- pscores(adjm, list(consesusm))
#scoresu.pc.late <- pscores(adjm, list(consesusu))

#scores.pc.late$Statistics %>%
#    datatable(extensions = 'Buttons',
#              options = list(
#                dom = 'Bfrtip',
#                buttons = c('csv', 'excel'),
#                scrollX = TRUE,
#                pageLength = 10), 
#              caption = "scores vote")

#scoresu.pc.late$Statistics %>%
#    datatable(extensions = 'Buttons',
#              options = list(
#                dom = 'Bfrtip',
#                buttons = c('csv', 'excel'),
#                scrollX = TRUE,
#                pageLength = 10), 
#              caption = "scores union")
#ajm_compared <- compare_consensus(consesusm, adjm)
#ajm_compared <- compare_consensus(consesusu, adjm)

Early Integration

early_matrix <- list(earlyj(count_matrices))

GENIE3

set.seed(1234)
time[["GENIE3_early_15Cores"]] <- system.time(
  genie3_early <- infer_networks(early_matrix, method="GENIE3", nCores = 15)
)

saveRDS(genie3_early, "./../analysis/genie3_early.RDS")

genie3_early[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 output")

symmetrize Output and ROC

genie3_early_wadj <- generate_adjacency(genie3_early, ground.truth = adjm)
sgenie3_early_wadj <- symmetrize(genie3_early_wadj, weight_function = "mean")
genie3_early_auc <- plotROC(sgenie3_early_wadj, adjm, plot_title = "ROC curve - GENIE3 Early Integration")

sgenie3_early_wadj[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 symmetrize output")

Generate Adjacency and Apply Cutoff

sgenie3_early_adj <- cutoff_adjacency(count_matrices = early_matrix,
                 weighted_adjm_list = sgenie3_early_wadj, 
                 ground.truth = adjm,
                 n = 2,
                 method = "GENIE3",
                 nCores = 15)
## Matrix 1 Mean 95th Percentile Cutoff: 0.005266203
sgenie3_early_adj[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 adjacency")

Comparison with the Ground Truth

scores.genie3.early <- pscores(adjm, sgenie3_early_adj)

scores.genie3.early$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
plots <- plotg(sgenie3_early_adj)

ajm_compared <- compare_consensus(sgenie3_early_adj[[1]], adjm)

GRNBoost2

set.seed(1234)
start_time <- Sys.time()
time[["GRNBoost2_early"]] <- system.time(
  grnb_early <- infer_networks(early_matrix, method="GRNBoost2")
)

saveRDS(grnb_early, "./../analysis/grnb_early.RDS")

grnb_early[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 output")

symmetrize Output and ROC

grnb_early_wadj <- generate_adjacency(grnb_early, ground.truth = adjm)
sgrnb_early_wadj <- symmetrize(grnb_early_wadj, weight_function = "mean")
grnb_early_auc <- plotROC(sgrnb_early_wadj, adjm, plot_title = "ROC curve - GRNBoost2 Early Integration")

grnb_early_wadj[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 symmetrize output")

Generate Adjacency and Apply Cutoff

sgrnb_early_adj <- cutoff_adjacency(count_matrices = early_matrix,
                 weighted_adjm_list = sgrnb_early_wadj, 
                 ground.truth = adjm,
                 n = 2,
                 method = "GRNBoost2")
## Matrix 1 Mean 95th Percentile Cutoff: 2.537023
sgrnb_early_adj[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 adjacency")

Comparison with the Ground Truth

scores.grn.early <- pscores(adjm, sgrnb_early_adj)

scores.grn.early$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
plots <- plotg(sgrnb_early_adj)

ajm_compared <- compare_consensus(sgrnb_early_adj[[1]], adjm)

ZILGM Park

set.seed(1234)

time[["ZILGM_early_15Cores"]] <- system.time(
  zilgm_late <- infer_networks(count_matrices_list = early_matrix, method = "ZILGM", adjm = adjm, nCores = 15)
)
## learning for NBII graphical model 
## nlambda : 50
## penalty function : LASSO
## update type : IRLS
## Conducting sampling in progress :  10 % 
Conducting sampling in progress :  20 % 
Conducting sampling in progress :  30 % 
Conducting sampling in progress :  40 % 
Conducting sampling in progress :  50 % 
Conducting sampling in progress :  60 % 
Conducting sampling in progress :  70 % 
Conducting sampling in progress :  80 % 
Conducting sampling in progress :  90 % 
Conducting sampling in progress :  100 % 
Assigned lamb to lambda_results[[ 1 ]] with 50 matrices.
saveRDS(zilgm_late, "./../analysis/zilgm_early.RDS")

est_graphs <- zilgm_late$network_results
lambdas <- zilgm_late$lambda_results
zilgm_early_auc <- plotROC(lambdas[[1]], adjm, plot_title = "ROC ZILGM early", is_binary = T)

Comparison with the Ground Truth

scores.zilgm.early <- pscores(adjm, est_graphs)

scores.zilgm.early$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
ajm_compared <- compare_consensus(est_graphs[[1]], adjm)

PCzinb

#tictoc::tic("PCzinb early")
#zinbres <- infer_networks(count_matrices_list = early_matrix, 
#                          method = "PCzinb",
#                          adjm = adjm)
#execution_times[['PCzinb early']] <- tictoc::toc(log = TRUE)$toc[[1]]
#scores.pc.early <- pscores(adjm, zinbres)

#scores.pc.early$Statistics %>%
#    datatable(extensions = 'Buttons',
#              options = list(
#               dom = 'Bfrtip',
#                buttons = c('csv', 'excel'),
#                scrollX = TRUE,
#                pageLength = 10), 
#             caption = "scores early")
#ajm_compared <- compare_consensus(zinbres[[1]], adjm)

Joint Integration

Joint Random Forest

#https://cran.r-project.org/src/contrib/Archive/JRF/
#install.packages("/home/francescoc/Downloads/JRF_0.1-4.tar.gz", repos = NULL, type = "source")
set.seed(1234)
time[["JRF"]] <- system.time(
  jrf_mat <- infer_networks(count_matrices, method="JRF")
)
 
jrf_mat[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "JRF output")

Their Permutation

#jrf_matrices <- lapply(count_matrices, t)
#jrf_matrices_norm <- lapply(jrf_matrices,function(x) {
#  (x - mean(x)) / sd(x)
#  })

#genes <- rownames(jrf_matrices_norm[[1]])

#set.seed(1234)
#out.perm <- Run_permutation(jrf_matrices_norm,mtry=round(sqrt(length(genes)-1)),ntree=500, genes,5)

#myJRF_network <- function(out.jrf, out.perm, TH) {
  
#  nclasses <- dim(out.perm)[3]
#  M <- dim(out.perm)[2]
#  out <- vector("list", nclasses)  

#  for (net in 1:nclasses) { 
#    j.np <- sort(out.jrf[, 2 + net], decreasing = TRUE)
#    FDR <- matrix(0, dim(out.perm)[1], 1)
    
#    th <- NULL  
#    for (s in 1:length(j.np)) { 
#      FP <- sum(sum(out.perm[, , net] >= j.np[s])) / M
#      FDR[s] <- FP / s
      
#      if (FDR[s] > TH) {
#        th <- j.np[s]
#        break
#      }
#    }
    
#    out[[net]] <- out.jrf[out.jrf[, 2 + net] > th, seq(1, 2)]
#  }
  
#  return(out)
#}

#mynet <- myJRF_network(jrf_mat[[1]],out.perm,0.05)
#mynet
#jrf_adj <- function(df_list, adjm) {
  # Ensure the genes (row and column names) are in the adjacency matrix
#  genes <- rownames(adjm)
  
  # Initialize a list to store adjacency matrices
#  adjacency_matrices <- list()
  
  # Loop through each data frame in the list
#  for (i in seq_along(df_list)) {
#    df <- df_list[[i]]
    
    # Initialize a new adjacency matrix with zeros
#    adj_matrix <- adjm * 0  # Reset to zero for each df
    
    # Update the adjacency matrix for the gene pairs
#    for (j in seq_len(nrow(df))) {
#      gene1 <- df$gene1[j]
#      gene2 <- df$gene2[j]
      
      # Check if both genes are present in the adjacency matrix
#      if (gene1 %in% genes && gene2 %in% genes) {
#        idx1 <- which(genes == gene1)  # Find the row index for gene1
#        idx2 <- which(genes == gene2)  # Find the column index for gene2
        
        # Update the adjacency matrix to set an edge
#        adj_matrix[idx1, idx2] <- 1
#        adj_matrix[idx2, idx1] <- 1  # Ensure symmetry
#      } else {
#        warning(paste("Gene pair not found in adjm:", gene1, "-", gene2))
#      }
#    }
    
    # Store the resulting adjacency matrix in the list
#    adjacency_matrices[[i]] <- adj_matrix
#  }
  
  # Return the list of adjacency matrices
#  return(adjacency_matrices)
#}


#jrf_adj_mynet <- jrf_adj(mynet, adjm)
#jrf_auc_perm <- plotROC(jrf_adj_mynet, adjm, plot_title = "ROC curve - JRF perm out", is_binary = T)
#scores.jrf.perm <- pscores(adjm, jrf_adj_mynet)

#scores.jrf.perm$Statistics %>%
#    datatable(extensions = 'Buttons',
#              options = list(
#                dom = 'Bfrtip',
#                buttons = c('csv', 'excel'),
#                scrollX = TRUE,
#                pageLength = 10), 
#              caption = "scores")
#plots <- plotg(jrf_adj_mynet)
#consesusu <- create_consensus(jrf_adj_mynet, method="union")

#scoresu.jrf.perm <- pscores(adjm, list(consesusu))

#scoresu.jrf.perm$Statistics %>%
#    datatable(extensions = 'Buttons',
#              options = list(
#                dom = 'Bfrtip',
#                buttons = c('csv', 'excel'),
#                scrollX = TRUE,
#                pageLength = 10), 
#              caption = "scores")
#ajm_compared <- compare_consensus(consesusu, adjm)

Prepare the output

jrf_list <- list()

importance_columns <- grep("importance", names(jrf_mat[[1]]), value = TRUE)

for (i in seq_along(importance_columns)) {
# Select the 'gene1', 'gene2', and the current 'importance' column
  df <- jrf_mat[[1]][, c("gene1", "gene2", importance_columns[i])]
  
  # Rename the importance column to its original name (e.g., importance1, importance2, etc.)
  names(df)[3] <- importance_columns[i]
  
  # Add the data frame to the output list
  jrf_list[[i]] <- df
}

saveRDS(jrf_list, "./../analysis/jrf.RDS")

jrf_list[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "JRF output")

symmetrize Output and ROC

jrf_wadj <- generate_adjacency(jrf_list, ground.truth = adjm)
sjrf_wadj <- symmetrize(jrf_wadj, weight_function = "mean")
jrf_auc_mine <- plotROC(sjrf_wadj, adjm, plot_title = "ROC curve - JRF Late Integration")

sjrf_wadj[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "JRF symmetrize output")

Generate Adjacency and Apply Cutoff

sjrf_adj <- cutoff_adjacency(count_matrices = count_matrices,
                 weighted_adjm_list = sjrf_wadj, 
                 ground.truth = adjm,
                 n = 3,
                 method = "JRF")
## Matrix 1 Mean 95th Percentile Cutoff: 2.469836 
## Matrix 2 Mean 95th Percentile Cutoff: 2.461713 
## Matrix 3 Mean 95th Percentile Cutoff: 2.481421
sjrf_adj[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "JRF adjacency")

Comparison with the Ground Truth

scores.jrf.all <- pscores(adjm, sjrf_adj)

scores.jrf.all$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
plots <- plotg(sjrf_adj)

consesusm <- create_consensus(sjrf_adj, method="vote")
consesusu <- create_consensus(sjrf_adj, method="union")
consesunet <- create_consensus(adj_matrix_list = sjrf_adj, weighted_list = sjrf_wadj, method = "INet", threshold = 0.1, ncores = 15)
## [1] 0.3513283
## [1] 0.1024187
## [1] 0.02673072
scores.jrf <- pscores(adjm, list(consesusm))

scoresu.jrf <- pscores(adjm, list(consesusu))

scoresnet.jrf <- pscores(adjm, list(consesunet))

scores.jrf$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores vote")
scoresu.jrf$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores union")
scoresnet.jrf$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores INet")
ajm_compared <- compare_consensus(consesusm, adjm)

ajm_compared <- compare_consensus(consesusu, adjm)

ajm_compared <- compare_consensus(consesunet, adjm)

Method Comparison

time_data <- data.frame(
  Method = names(time),
  Time_in_Seconds = sapply(time, function(x) {
    if ("elapsed" %in% names(x)) x["elapsed"] else NA
  })
)

time_data$Time_in_Minutes <- as.numeric(time_data$Time_in_Seconds) / 60
time_data$Time_in_Hours <- as.numeric(time_data$Time_in_Seconds) / 3600

time_data <- time_data[order(time_data$Time_in_Hours), ]
time_data$Method <- factor(time_data$Method, levels = time_data$Method)

time_data <- time_data %>%
  mutate(Method_Group = case_when(
    grepl("GENIE3", Method) ~ "GENIE3",
    grepl("GRNBoost2", Method) ~ "GRNBoost2",
    grepl("ZILGM", Method) ~ "ZILGM",
    grepl("JRF", Method) ~ "JRF",
    grepl("PCzinb", Method) ~ "PCzinb"
  ))

method_colors <- c("GENIE3" = "darkblue", "GRNBoost2" = "darkgreen", "ZILGM" = "orange", "JRF" = "red", "PCzinb" = "purple")

time_plot <- ggplot(time_data, aes(x = Method, y = Time_in_Hours, fill = Method_Group)) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = sprintf("%.1f min", Time_in_Minutes)), vjust = -0.5) +
  labs(title = "Execution Time for Each Method", y = "Time (Hours)", x = NULL) +
  scale_fill_manual(values = method_colors) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "none"
  )

print(time_plot) 

mplot <- list()

for (k in c("TPR", "FPR", "Precision", "F1", "Accuracy")) {
  mplot[[k]] <- data.frame(
    GENIE3_late = scores.genie3.late$Statistics[[k]],
    GENIE3_early = scores.genie3.early$Statistics[[k]],
    GRNBoost2_late = scores.grn.late$Statistics[[k]],
    GRNBoost2_early = scores.grn.early$Statistics[[k]],
    ZILGM_late = scores.zilgm.late$Statistics[[k]],
    ZILGM_early = scores.zilgm.early$Statistics[[k]],
    #PCzinb_late = scores.pc.late$Statistics[[k]],
    #PCzinb_early = scores.pc.early$Statistics[[k]],
    JRF = scores.jrf$Statistics[[k]]
  )
}

mplot[["AUC"]] <- data.frame(
    GENIE3_late = mean(genie3_late_auc$AUC),
    GENIE3_early = genie3_early_auc$AUC,
    GRNBoost2_late = mean(grnb_late_auc$AUC),
    GRNBoost2_early = grnb_early_auc$AUC,
    ZILGM_late = zilgm_late_auc$AUC,
    ZILGM_early = zilgm_early_auc$AUC,
    JRF = jrf_auc_mine$AUC
)

plot_data <- bind_rows(lapply(names(mplot), function(metric) {
  data.frame(
    Metric = metric,
    Method = names(mplot[[metric]]),
    Value = as.numeric(mplot[[metric]][1, ])
  )
}))

plot_data <- plot_data %>%
  mutate(Method_Group = case_when(
    grepl("GENIE3", Method) ~ "GENIE3",
    grepl("GRNBoost2", Method) ~ "GRNBoost2",
    grepl("ZILGM", Method) ~ "ZILGM",
    grepl("JRF", Method) ~ "JRF"
  )) %>%
  mutate(Method = factor(Method, levels = c(
    "GENIE3_early", "GENIE3_late", 
    "GRNBoost2_early", "GRNBoost2_late", 
    "ZILGM_early", "ZILGM_late","PCzinb_early", "PCzinb_late", 
    "JRF"  # Ensure JRF is last
  )))

plots <- lapply(unique(plot_data$Metric), function(metric) {
  show_x_text <- metric %in% c("Accuracy", "AUC")
  
  ggplot(plot_data %>% filter(Metric == metric), aes(x = Method, y = Value, fill = Method_Group)) +
    geom_bar(stat = "identity", position = position_dodge(width = 0.8)) +
    labs(title = metric, y = "Value", x = NULL) +
    scale_y_continuous(limits = c(0, 1)) +  
    scale_fill_manual(values = method_colors) +
    theme_minimal() +
    theme(
      axis.text.x = if (show_x_text) element_text(angle = 45, hjust = 1) else element_blank(),
      axis.title.x = element_blank(),
      legend.position = "none"  
    )
})

final_plot <- (plots[[1]] | plots[[2]]) /
              (plots[[3]] | plots[[4]]) /
              (plots[[5]] | plots[[6]]) +
  plot_layout(guides = "collect") & 
  theme(legend.position = "bottom")

print(final_plot)

sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=it_IT.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=it_IT.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=it_IT.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=it_IT.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] doRNG_1.8.2        rngtools_1.5.2     INetTool_0.1.0     scales_1.2.0      
##  [5] patchwork_1.1.1    ZILGM_0.1.1        RColorBrewer_1.1-3 rsvg_2.6.1        
##  [9] DiagrammeRsvg_0.1  JRF_0.1-4          pROC_1.18.0        DiagrammeR_1.0.11 
## [13] gridExtra_2.3      reshape2_1.4.4     rbenchmark_1.0.0   learn2count_0.3.2 
## [17] reticulate_1.34.0  DT_0.22            forcats_0.5.1      stringr_1.4.0     
## [21] dplyr_1.0.9        purrr_0.3.4        readr_2.1.2        tidyr_1.2.0       
## [25] tibble_3.1.7       ggplot2_3.3.6      tidyverse_1.3.1    igraph_2.0.3      
## [29] doParallel_1.0.17  iterators_1.0.14   foreach_1.5.2      GENIE3_1.16.0     
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.4.0                backports_1.4.1            
##   [3] plyr_1.8.7                  splines_4.1.0              
##   [5] crosstalk_1.2.0             GenomeInfoDb_1.30.1        
##   [7] digest_0.6.29               htmltools_0.5.2            
##   [9] fansi_1.0.3                 magrittr_2.0.3             
##  [11] tzdb_0.3.0                  modelr_0.1.8               
##  [13] matrixStats_0.62.0          colorspace_2.0-3           
##  [15] rvest_1.0.2                 haven_2.5.0                
##  [17] xfun_0.30                   crayon_1.5.1               
##  [19] RCurl_1.98-1.6              jsonlite_1.8.0             
##  [21] graph_1.72.0                survival_3.3-1             
##  [23] glue_1.6.2                  gtable_0.3.0               
##  [25] zlibbioc_1.40.0             distributions3_0.2.2       
##  [27] XVector_0.34.0              DelayedArray_0.20.0        
##  [29] V8_6.0.0                    car_3.0-13                 
##  [31] shape_1.4.6                 SingleCellExperiment_1.16.0
##  [33] BiocGenerics_0.40.0         abind_1.4-5                
##  [35] pscl_1.5.9                  DBI_1.1.2                  
##  [37] rstatix_0.7.0               Rcpp_1.0.8.3               
##  [39] datastructures_0.2.9        stats4_4.1.0               
##  [41] glmnet_4.1-8                htmlwidgets_1.5.4          
##  [43] httr_1.4.3                  WeightSVM_1.7-16           
##  [45] ellipsis_0.3.2              farver_2.1.0               
##  [47] pkgconfig_2.0.3             sass_0.4.1                 
##  [49] dbplyr_2.1.1                utf8_1.2.2                 
##  [51] labeling_0.4.2              tidyselect_1.1.2           
##  [53] rlang_1.1.4                 munsell_0.5.0              
##  [55] cellranger_1.1.0            tools_4.1.0                
##  [57] visNetwork_2.1.2            cli_3.3.0                  
##  [59] generics_0.1.2              statnet.common_4.10.0      
##  [61] broom_0.8.0                 evaluate_0.15              
##  [63] fastmap_1.1.0               yaml_2.3.5                 
##  [65] bst_0.3-24                  knitr_1.39                 
##  [67] fs_1.5.2                    caTools_1.18.2             
##  [69] xml2_1.3.3                  mpath_0.4-2.26             
##  [71] multinet_4.2.1              compiler_4.1.0             
##  [73] rstudioapi_0.13             curl_4.3.2                 
##  [75] png_0.1-7                   ggsignif_0.6.3             
##  [77] reprex_2.0.1                bslib_0.3.1                
##  [79] stringi_1.7.6               highr_0.9                  
##  [81] lattice_0.20-45             iZID_0.0.1                 
##  [83] Matrix_1.6-1.1              gbm_2.2.2                  
##  [85] vctrs_0.4.1                 pillar_1.7.0               
##  [87] lifecycle_1.0.1             jquerylib_0.1.4            
##  [89] bitops_1.0-7                GenomicRanges_1.46.1       
##  [91] R6_2.5.1                    network_1.18.2             
##  [93] IRanges_2.28.0              codetools_0.2-18           
##  [95] MASS_7.3-57                 assertthat_0.2.1           
##  [97] SummarizedExperiment_1.24.0 withr_2.5.0                
##  [99] S4Vectors_0.32.4            GenomeInfoDbData_1.2.7     
## [101] hms_1.1.1                   grid_4.1.0                 
## [103] rpart_4.1.16                coda_0.19-4                
## [105] flux_0.3-0.1                rmarkdown_2.14             
## [107] MatrixGenerics_1.6.0        carData_3.0-5              
## [109] ggpubr_0.4.0                numDeriv_2016.8-1.1        
## [111] Biobase_2.54.0              lubridate_1.8.0